- Notifications
You must be signed in to change notification settings - Fork 31
/
Copy path187. Repeated DNA Sequences.c
108 lines (97 loc) · 2.7 KB
/
187. Repeated DNA Sequences.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*
187. Repeated DNA Sequences
All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT",
Return:
["AAAAACCCCC", "CCCCCAAAAA"].
*/
/**
* Return an array of size *returnSize.
* Note: The returned array must be malloced, assume caller calls free().
*/
typedefstructsn {
//char *s;
intfound;
unsigned intc;
structsn*shadow;
} sn_t;
#defineHF 1000
intmap[26] = { 0 };
unsigned intcode(constchar*s) {
inti, c;
//c = 5381;
c=0;
for (i=0; i<10; i++) {
// c = c * 33 + s[i];
c= (c << 2) | map[s[i] -'A'];
}
returnc;
}
char**findRepeatedDnaSequences(constchar*s, int*returnSize) {
sn_t*patt[HF] = { 0 }, *e, *shadow;
unsigned intc;
char**p, *buff;
constchar*x;
intsz, n, len;
map['A'-'A'] =0x0;
map['C'-'A'] =0x1;
map['G'-'A'] =0x2;
map['T'-'A'] =0x3;
sz=10; n=0;
p=malloc(sz*sizeof(char*));
//assert(p);
len=strlen(s);
while (*s&&len >= 10) {
c=code(s);
e=patt[c % HF];
while (e&&e->c!=c) {
e=e->shadow;
}
if (!e) { // not yet being searched
e=malloc(sizeof(sn_t));
//assert(e);
//e->s = s;
e->found=0;
e->c=c;
e->shadow=patt[c % HF]; // save in searched table
patt[c % HF] =e;
} elseif (!e->found) { // this is a repeated pattern
e->found=1;
if (n==sz) {
sz *= 2;
p=realloc(p, sz*sizeof(char*));
//assert(p);
}
buff=malloc(11*sizeof(char));
//assert(buff);
strncpy(buff, s, 10);
buff[10] =0;
p[n++] =buff;
}
s++; len--;
}
*returnSize=n;
for (n=0; n<HF; n++) {
e=patt[n];
while (e) {
shadow=e->shadow;
free(e);
e=shadow;
}
}
returnp;
}
/*
Difficulty:Medium
Total Accepted:78.5K
Total Submissions:249.6K
Companies LinkedIn
Related Topics Hash Table Bit Manipulation
*/